# Summary stats table

source("scripts/zfunct_regs_header.R")
tab.path <- "results/table/"

#N obs 

Re_tots10 <- Re[Re$year == 1911,] %>% select(c("treat_10")) %>%
    group_by(treat_10) %>%
    count(na.rm = T)
Nu <- Re_tots10$n[!Re_tots10$treat_10]
Nt <- Re_tots10$n[Re_tots10$treat_10]
#Variables varying per year varying per year
Re$prl.pop.sh <- ifelse(Re$pop == 0, NA , Re$tot.prl / Re$pop)

Yt <- c("tot.reg", "par.onl",  "par.onl.sh")
Yt_lab <- c("Total Electors (hundreds)", 
    "Local Electors (hundreds)",
     "Share Local over Total Electors")
#Mean
sum_yt <- Re %>% group_by(dpost, treat_10) %>%
    mutate(tot.reg = tot.reg / 100,
     tot.prl = tot.prl / 100, 
     par.onl = par.onl / 100) %>%
    summarize_at(Yt, c("mean"), na.rm = T) 
sum_yt_pre <- subset(sum_yt, dpost == FALSE) %>% 
    rownames_to_column() %>%
    pivot_longer(!rowname, names_to = "col1", values_to = "col2") %>% 
    pivot_wider(names_from = "rowname", values_from = "col2") %>% 
    subset(!(col1 %in% c("dpost", "treat_10")))
names(sum_yt_pre) <- c("varname", "untreated", "treated")
sum_yt_pre$lab <- Yt_lab
sum_yt_post <- subset(sum_yt, dpost == TRUE)  %>% 
   rownames_to_column() %>%
    pivot_longer(!rowname, names_to = "col1", values_to = "col2") %>% 
    pivot_wider(names_from = "rowname", values_from = "col2") %>% 
    subset(!(col1 %in% c("dpost", "treat_10")))
names(sum_yt_post) <- c("varname", "untreated", "treated")
sum_yt_post$lab <- Yt_lab

# #Sd
sum_yt_sd <- Re %>% group_by(dpost, treat_10) %>%
    mutate(tot.reg = tot.reg / 100,
     tot.prl = tot.prl / 100, 
     par.onl = par.onl / 100) %>%
    summarize_at(Yt, c("sd"), na.rm = T) 
sum_yt_pre_sd <- subset(sum_yt_sd, dpost == FALSE) %>% 
    rownames_to_column() %>%
    pivot_longer(!rowname, names_to = "col1", values_to = "col2") %>% 
    pivot_wider(names_from = "rowname", values_from = "col2") %>% 
    subset(!(col1 %in% c("dpost", "treat_10")))
names(sum_yt_pre_sd) <- c("varname", "untreated", "treated")
sum_yt_pre_sd$lab <- Yt_lab
sum_yt_post_sd <- subset(sum_yt_sd, dpost == TRUE)  %>% 
   rownames_to_column() %>%
    pivot_longer(!rowname, names_to = "col1", values_to = "col2") %>% 
    pivot_wider(names_from = "rowname", values_from = "col2") %>% 
    subset(!(col1 %in% c("dpost", "treat_10")))
names(sum_yt_post_sd) <- c("varname", "untreated", "treated")
sum_yt_post_sd$lab <- Yt_lab

sum_yt_pre$tstat <- (sum_yt_pre$untreated - sum_yt_pre$treated)/
     sqrt((sum_yt_pre$untreated^2/Nu) + (sum_yt_pre$treated^2/Nt) )
sum_yt_pre$p_val <- round(2*pt(abs(sum_yt_pre$tstat), Inf, lower.tail = FALSE), 3)

sum_yt_post$tstat <- (sum_yt_post$untreated - sum_yt_post$treated)/
     sqrt((sum_yt_post$untreated^2/Nu) + (sum_yt_post$treated^2/Nt) )
sum_yt_post$p_val <- round(2*pt(abs(sum_yt_post$tstat), Inf, lower.tail = FALSE), 3)

out1_pre <- data.frame( labl   =  sum_yt_pre$lab,
                    mean_u =  round(sum_yt_pre$untreated,  3),
                    sd_u   =  round(sum_yt_pre_sd$treated,  3),
                    mean_t =  round(sum_yt_pre$treated,  3),
                    sd_t   =  round(sum_yt_pre_sd$treated,  3),
                    diff   =  round((sum_yt_pre$untreated-sum_yt_pre$treated),  3), 
                    pval   =  round(sum_yt_pre$p_val, 3)
)

out1_post <- data.frame( labl   =  sum_yt_post$lab,
                    mean_u =  round(sum_yt_post$untreated,  3),
                    sd_u   =  round(sum_yt_post_sd$treated,  3),
                    mean_t =  round(sum_yt_post$treated,  3),
                    sd_t   =  round(sum_yt_post_sd$treated,  3),
                    diff   =  round((sum_yt_post$untreated-sum_yt_post$treated),  3), 
                    pval   =  round(sum_yt_post$p_val, 3)
)

## Variables constant
#Controls of interest 


Kon <- c( "treat_10", "min.dist.city10k",  "pop" ,
    "dist.roads", "av_age",
    "sexr",  "single_", "tfr",
    "f_smam",
    "f_cel_4", 
    "m_cel_4",
    "fmar_pr", "ecmr",
    "hc1",  "hc3",  "hc2", "hc4",  "hc5")

Kon_Lab <- c("Distance to City (km)",
    "Population (thousands)",
    "Distance to Road (km)",
    "Average Age",
    "Female Share of Population",
    "Single Person HouseHolds, pct",
    "Total Fertility Rate (children per women)",
    "Age at Marriage for Women",
    "Female Celibacy Rate",
    "Male Celibacy Rate",
    "Married Women Working, pct" ,
    "Child Mortality Rate, per thousand",
    "HISCLASS High Skill Non-Manual, pct",
    "HISCLASS High Skill Manual, pct",
    "HISCLASS Low Skill Skill Non-Manual, pct",
    "HISCLASS Low Skill Manual, pct",
    "HISCLASS Unskilled"
 )

Re_sum <- Re %>% select(all_of(c("G_UNIT", "year", Kon))) %>%
    mutate(pop = pop/1000) %>%
    group_by(G_UNIT) %>% 
    summarize_at(c(Kon), mean, na.rm = T) %>%
    ungroup() %>%
    select(c(Kon)) %>%
    group_by(treat_10) %>%
    summarize_all(c("mean"), na.rm = T) %>%   #RESHAPE FROM HERE
    rownames_to_column() %>%
    pivot_longer(!rowname, names_to = "col1", values_to = "col2") %>% 
    pivot_wider(names_from = "rowname", values_from = "col2") %>% 
    subset(!(col1 %in% c( "treat_10")))
names(Re_sum) <- c("varname", "untreated", "treated")
Re_sum$lab <- Kon_Lab
Re_sum_sd <- Re %>% select(c("G_UNIT", "year", Kon)) %>%
    mutate(pop = pop/1000) %>%
    group_by(G_UNIT) %>% 
    summarize_at(c(Kon), mean, na.rm = T) %>%
    ungroup() %>%
    select(c(Kon)) %>%
    group_by(treat_10) %>%
    summarize_all(c("sd"), na.rm = T) %>%   #RESHAPE FROM HERE
    rownames_to_column() %>%
    pivot_longer(!rowname, names_to = "col1", values_to = "col2") %>% 
    pivot_wider(names_from = "rowname", values_from = "col2") %>% 
    subset(!(col1 %in% c("treat_10")))
names(Re_sum_sd) <- c("varname", "untreated", "treated")
Re_sum_sd$lab <- Kon_Lab

Re_sum$tstat <- (Re_sum$untreated - Re_sum$treated)/
     sqrt((Re_sum_sd$untreated^2/Nu) + (Re_sum_sd$treated^2/Nt) )
Re_sum$p_val <- round(2*pt(abs(Re_sum$tstat), Inf, lower.tail = FALSE), 3)


out2 <- data.frame( labl   =  Re_sum$lab,
                    mean_u =  round(Re_sum$untreated,  2),
                    sd_u   =  round(Re_sum_sd$treated,  2),
                    mean_t =  round(Re_sum$treated,  2),
                    sd_t   =  round(Re_sum_sd$treated,  2),
                    diff   =  round((Re_sum$untreated-Re_sum$treated),  2), 
                    pval   =  round(Re_sum$p_val, 2)
)

## Write Files 

write.table(out1_pre,
    file=paste0(tab.path, "tab_01_b1.tex"),
    sep=" & ", 
    quote =  FALSE, 
    col.names = FALSE ,
    row.names = FALSE, 
    eol = " \\\\ \n ")

write.table(out1_post,
    file=paste0(tab.path, "tab_01_b2.tex"),
    sep=" & ", 
    quote =  FALSE, 
    col.names = FALSE ,
    row.names = FALSE, 
    eol = " \\\\ \n ")

rm(list=
    setdiff(ls()[sapply(ls(), function(x) any(class(get(x)) == 'data.frame'))],
    c("Re","Ind")))